//css_reference "core.dll"; //css_reference "Databases.dll"; //css_reference "utils.dll"; using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Text; using System.Windows.Forms; using System.IO; using System.Net; using System.Collections; using System.Web; using System.Text.RegularExpressions; using MediaPortal.Util; class Grabber : MediaPortal.Video.Database.IIMDBScriptGrabber { public Grabber() { } void MediaPortal.Video.Database.IIMDBScriptGrabber.FindFilm(string strSearch, int iLimit, ArrayList elements) { int iCount = 0; string strTitle; try { string absoluteUri; string strURL = "http://us.imdb.com/Tsearch?title=" + strSearch; string strBody = GetPage(strURL, "utf-8", out absoluteUri); // Mars Warrior @ 03-sep-2004. // First try to find an Exact Match. If no exact match found, just look // for any match and add all those to the list. This narrows it down more easily... int iStartOfMovieList = strBody.IndexOf("Popular Titles"); if (iStartOfMovieList < 0) iStartOfMovieList = strBody.IndexOf("Exact Matches"); if (iStartOfMovieList < 0) iStartOfMovieList = strBody.IndexOf("Partial Matches"); if (iStartOfMovieList < 0) iStartOfMovieList = strBody.IndexOf("Approx Matches"); int endOfTitleList = strBody.IndexOf("Suggestions For Improving Your Results"); if (iStartOfMovieList < 0) { int iMovieTitle = strBody.IndexOf(""); int iOverview = strBody.IndexOf("Overview"); int iMovieGenre = strBody.IndexOf("Genre:"); int iMoviePlot = strBody.IndexOf("Plot"); if (iMovieTitle >= 0 && iOverview >= 0 && iMoviePlot >= 0) { int iEnd = strBody.IndexOf("<", iMovieTitle + 7); if (iEnd > 0) { iMovieTitle += "<title>".Length; strTitle = strBody.Substring(iMovieTitle, iEnd - iMovieTitle); strTitle = MediaPortal.Util.Utils.stripHTMLtags(strTitle); HTMLUtil htmlUtil = new HTMLUtil(); htmlUtil.ConvertHTMLToAnsi(strTitle, out strTitle); MediaPortal.Video.Database.IMDB.IMDBUrl url = new MediaPortal.Video.Database.IMDB.IMDBUrl(strURL, strTitle + " (imdb)", "IMDB"); elements.Add(url); } } return; } iStartOfMovieList += "<table>".Length; int iEndOfMovieList = strBody.IndexOf("</table>", iStartOfMovieList); if (iEndOfMovieList < 0) { iEndOfMovieList = strBody.Length; } if (endOfTitleList < iEndOfMovieList && endOfTitleList > iStartOfMovieList) { iEndOfMovieList = endOfTitleList; } strBody = strBody.Substring(iStartOfMovieList, iEndOfMovieList - iStartOfMovieList); while ((true) && (iCount < iLimit)) { ////<A HREF="/Title?0167261">Lord of the Rings: The Two Towers, The (2002)</A> int iAHREF = strBody.IndexOf("<a href="); if (iAHREF >= 0) { int iEndAHREF = strBody.IndexOf("</a>"); if (iEndAHREF >= 0) { iAHREF += "<a href=.".Length; string strAHRef = strBody.Substring(iAHREF, iEndAHREF - iAHREF); int iURL = strAHRef.IndexOf(">"); if (iURL > 0) { strTitle = ""; strURL = strAHRef.Substring(0, iURL); if (strURL[strURL.Length - 1] == '\"') strURL = strURL.Substring(0, strURL.Length - 1); iURL++; int iURLEnd = strAHRef.IndexOf("<", iURL); if (iURLEnd > 0) { strTitle = strAHRef.Substring(iURL, iURLEnd - iURL); } else strTitle = strAHRef.Substring(iURL); int onclick = strURL.IndexOf(" onclick"); if (onclick >= 0) strURL = strURL.Substring(0, onclick - 1); strURL = String.Format("http://us.imdb.com{0}", strURL); HTMLUtil htmlUtil = new HTMLUtil(); htmlUtil.ConvertHTMLToAnsi(strTitle, out strTitle); int endTagLength = "</a>".Length; int posNextTag = strBody.IndexOf("<", iEndAHREF + endTagLength); if (posNextTag > 0) { string strSub = strBody.Substring(iEndAHREF + endTagLength, posNextTag - (iEndAHREF + endTagLength)); strTitle += strSub; } // to avoid including of   if ((strTitle.IndexOf("\n") < 0) && (strTitle.IndexOf(" ") < 0)) { MediaPortal.Video.Database.IMDB.IMDBUrl url = new MediaPortal.Video.Database.IMDB.IMDBUrl(strURL, strTitle + " (imdb_com)", "imdb_com"); elements.Add(url); } iCount++; } if (iEndAHREF + 1 >= strBody.Length) break; iStartOfMovieList = iEndAHREF + 1; strBody = strBody.Substring(iEndAHREF + 1); } else { break; } } else { break; } } } catch (Exception ex) { MediaPortal.GUI.Library.Log.Error("exception for imdb lookup of {0} err:{1} stack:{2}",strSearch, ex.Message, ex.StackTrace); } } bool MediaPortal.Video.Database.IIMDBScriptGrabber.GetDetails(MediaPortal.Video.Database.IMDB.IMDBUrl url, ref MediaPortal.Video.Database.IMDBMovie movieDetails) { try { int iStart = 0; int iEnd = 0; movieDetails.Reset(); // add databaseinfo // may add an another grabber movieDetails.Database = "imdb_com"; string strAbsURL; string strBody = GetPage(url.URL, "utf-8", out strAbsURL); if (strBody == null || strBody.Length == 0) return false; int iPos = strAbsURL.IndexOf("/title/"); if (iPos > 0) { iPos += "/title/".Length; movieDetails.IMDBNumber = strAbsURL.Substring(iPos); int pos = movieDetails.IMDBNumber.IndexOf("/"); if (pos > 0) movieDetails.IMDBNumber = movieDetails.IMDBNumber.Substring(0, pos); } url.Title = url.Title.Trim(); // cut of " (imdb)" iEnd = url.Title.IndexOf("("); if (iEnd >= 0) movieDetails.Title = url.Title.Substring(0, iEnd); else movieDetails.Title = url.Title; movieDetails.Title = movieDetails.Title.Trim(); string movieTitle = System.Web.HttpUtility.HtmlEncode(movieDetails.Title); int iDirectedBy = strBody.IndexOf("Director"); int iCredits = strBody.IndexOf("Writer"); int iGenre = strBody.IndexOf("Genre:"); int iTagLine = strBody.IndexOf("Tagline:</h5>"); int iPlotOutline = strBody.IndexOf("Plot Outline:</h5>"); int iPlotSummary = strBody.IndexOf("Plot Summary:</h5>"); int iPlot = strBody.IndexOf("<a href=\"plotsummary"); int iImage = strBody.IndexOf("<img border=\"0\" alt=\"" + movieTitle + "\" title=\"" + movieTitle + "\" src=\""); if (iImage >= 0) { iImage += ("<img border=\"0\" alt=\"" + movieTitle + "\" title=\"" + movieTitle + "\" src=\"").Length; } int iRating = strBody.IndexOf("User Rating:</b>"); int iCred = strBody.IndexOf("<table class=\"cast\">"); int iTop = strBody.IndexOf("Top 250:"); int iYear = strBody.IndexOf("/Sections/Years/"); if (iYear >= 0) { iYear += "/Sections/Years/".Length; string strYear = strBody.Substring(iYear, 4); movieDetails.Year = System.Int32.Parse(strYear); } if (iDirectedBy >= 0) movieDetails.Director = ParseAHREFIMDB(strBody, iDirectedBy, url.URL).Trim(); if (iCredits >= 0) movieDetails.WritingCredits = ParseAHREFIMDB(strBody, iCredits, url.URL).Trim(); if (iGenre >= 0) movieDetails.Genre = ParseGenresIMDB(strBody, iGenre, url.URL).Trim(); if (iRating >= 0) // and votes { iRating += "User Rating:</b>".Length; iStart = strBody.IndexOf("<b>", iRating); if (iStart >= 0) { iStart += "<b>".Length; iEnd = strBody.IndexOf("/", iStart); // set rating string strRating = strBody.Substring(iStart, iEnd - iStart); if (strRating != String.Empty) strRating = strRating.Replace('.', ','); try { movieDetails.Rating = (float)System.Double.Parse(strRating); if (movieDetails.Rating > 10.0f) movieDetails.Rating /= 10.0f; } catch (Exception) { } if (movieDetails.Rating != 0.0f) { // now, votes movieDetails.Votes = "0"; iStart = strBody.IndexOf("(", iEnd + 2); if (iStart > 0) { iEnd = strBody.IndexOf(" votes</a>)", iStart); if (iEnd > 0) { iStart += "(<a href=\"ratings\">".Length; // skip the parantese and link before votes movieDetails.Votes = strBody.Substring(iStart, iEnd - iStart).Trim(); } } } } } if (iTop >= 0) // top rated movie :) { iTop += "top 250:".Length + 2; // jump space and # iEnd = strBody.IndexOf("</a>", iTop); string strTop = strBody.Substring(iTop, iEnd - iTop); movieDetails.Top250 = System.Int32.Parse(strTop); } if (iTagLine >= 0) { iTagLine += "Tagline:</h5>".Length; iEnd = strBody.IndexOf("<", iTagLine); movieDetails.TagLine = strBody.Substring(iTagLine, iEnd - iTagLine).Trim(); movieDetails.TagLine = MediaPortal.Util.Utils.stripHTMLtags(movieDetails.TagLine); movieDetails.TagLine = HttpUtility.HtmlDecode(movieDetails.TagLine); // Remove HTML entities like ½ } if (iPlotOutline < 0) { if (iPlotSummary > 0) { iPlotSummary += "Plot Summary:</h5>".Length; iEnd = strBody.IndexOf("<", iPlotSummary); movieDetails.PlotOutline = strBody.Substring(iPlotSummary, iEnd - iPlotSummary).Trim(); movieDetails.PlotOutline = MediaPortal.Util.Utils.stripHTMLtags(movieDetails.PlotOutline); movieDetails.PlotOutline = HttpUtility.HtmlDecode(movieDetails.PlotOutline); // remove HTML entities } } else { iPlotOutline += "Plot Outline:</h5>".Length; iEnd = strBody.IndexOf("<", iPlotOutline); movieDetails.PlotOutline = strBody.Substring(iPlotOutline, iEnd - iPlotOutline).Trim(); movieDetails.PlotOutline = MediaPortal.Util.Utils.stripHTMLtags(movieDetails.PlotOutline); movieDetails.PlotOutline = HttpUtility.HtmlDecode(movieDetails.PlotOutline); // remove HTML entities movieDetails.Plot = movieDetails.PlotOutline.Trim(); movieDetails.Plot = HttpUtility.HtmlDecode(movieDetails.Plot); // remove HTML entities } if (iImage >= 0) { iEnd = strBody.IndexOf("\"", iImage); movieDetails.ThumbURL = strBody.Substring(iImage, iEnd - iImage).Trim(); } //plot if (iPlot >= 0) { string strPlotURL = url.URL + "plotsummary"; try { string absoluteUri; string strPlotHTML = GetPage(strPlotURL, "utf-8", out absoluteUri); if (0 != strPlotHTML.Length) { int iPlotStart = strPlotHTML.IndexOf("<p class=\"plotpar\">"); if (iPlotStart >= 0) { iPlotStart += "<p class=\"plotpar\">".Length; int iPlotEnd = strPlotHTML.IndexOf("<i>", iPlotStart); // ends with <i> for person who wrote it or if (iPlotEnd < 0) iPlotEnd = strPlotHTML.IndexOf("</p>", iPlotStart); // </p> for end of paragraph if (iPlotEnd >= 0) { movieDetails.Plot = strPlotHTML.Substring(iPlotStart, iPlotEnd - iPlotStart); movieDetails.Plot = MediaPortal.Util.Utils.stripHTMLtags(movieDetails.Plot); movieDetails.Plot = HttpUtility.HtmlDecode(movieDetails.Plot); // remove HTML entities } } } } catch (Exception ex) { MediaPortal.GUI.Library.Log.Error("exception for imdb lookup of {0} err:{1} stack:{2}", strPlotURL, ex.Message, ex.StackTrace); } } //cast string RegCastBlock = "<table class=\"cast\">.*?</table>"; string RegActorAndRole = "td class=\"nm\"><a href=./name.*?>(?<actor>.*?)</a><.*?<td class=\"char\">(?<role>.*?)<"; Match castBlock = Regex.Match(strBody, RegCastBlock); // These are some fallback methods to find the block with the cast, in case something changes on IMDB, these may work reasonably well anyway... if (!castBlock.Success) castBlock = Regex.Match(strBody, @"redited\scast.*?</table>"); if (!castBlock.Success) castBlock = Regex.Match(strBody, @"first\sbilled\sonly.*?</table>"); if (!castBlock.Success) castBlock = Regex.Match(strBody, @"redited\scast.*?more"); if (!castBlock.Success) castBlock = Regex.Match(strBody, @"first\sbilled\sonly.*?more"); string strCastBlock = castBlock.Value; MatchCollection mc = Regex.Matches(strCastBlock, RegActorAndRole); string strActor = string.Empty; string strRole = string.Empty; foreach (Match m in mc) { strActor = string.Empty; strActor = m.Groups["actor"].Value; strActor = MediaPortal.Util.Utils.stripHTMLtags(strActor).Trim(); strActor = HttpUtility.HtmlDecode(strActor); strRole = string.Empty; strRole = m.Groups["role"].Value; strRole = MediaPortal.Util.Utils.stripHTMLtags(strRole).Trim(); strRole = HttpUtility.HtmlDecode(strRole); movieDetails.Cast += strActor; if (strRole != string.Empty) movieDetails.Cast += " as " + strRole; movieDetails.Cast += "\n"; } int iRunTime = strBody.IndexOf("Runtime:"); if (iRunTime > 0) { iRunTime += "Runtime:</h5>".Length; string runtime = ""; while (!Char.IsDigit(strBody[iRunTime]) && iRunTime + 1 < strBody.Length) iRunTime++; if (iRunTime < strBody.Length) { while (Char.IsDigit(strBody[iRunTime]) && iRunTime + 1 < strBody.Length) { runtime += strBody[iRunTime]; iRunTime++; } try { movieDetails.RunTime = Int32.Parse(runtime); } catch (Exception) { } } } int mpaa = strBody.IndexOf("MPAA</a>:</h5>"); if (mpaa > 0) { mpaa += "MPAA</a>:</h5>".Length; int mpaaEnd = strBody.IndexOf("</div>", mpaa); if (mpaaEnd > 0) { movieDetails.MPARating = strBody.Substring(mpaa, mpaaEnd - mpaa); } } return true; } catch (Exception ex) { MediaPortal.GUI.Library.Log.Error("exception for imdb lookup of {0} err:{1} stack:{2}", url.URL, ex.Message, ex.StackTrace); } return false; } string MediaPortal.Video.Database.IIMDBScriptGrabber.GetName() { return "IMDB grabber "; } string MediaPortal.Video.Database.IIMDBScriptGrabber.GetLanguage() { return "EN"; } private string GetPage(string strURL, string strEncode, out string absoluteUri) { string strBody = ""; absoluteUri = String.Empty; Stream ReceiveStream = null; StreamReader sr = null; WebResponse result = null; try { // Make the Webrequest //Log.Info("IMDB: get page:{0}", strURL); WebRequest req = WebRequest.Create(strURL); result = req.GetResponse(); ReceiveStream = result.GetResponseStream(); // Encoding: depends on selected page Encoding encode = System.Text.Encoding.GetEncoding(strEncode); sr = new StreamReader(ReceiveStream, encode); strBody = sr.ReadToEnd(); absoluteUri = result.ResponseUri.AbsoluteUri; } catch (Exception) { //Log.Error("Error retreiving WebPage: {0} Encoding:{1} err:{2} stack:{3}", strURL, strEncode, ex.Message, ex.StackTrace); } finally { if (sr != null) { try { sr.Close(); } catch (Exception) { } } if (ReceiveStream != null) { try { ReceiveStream.Close(); } catch (Exception) { } } if (result != null) { try { result.Close(); } catch (Exception) { } } } return strBody; } // END GetPage() string ParseAHREFIMDB(string strBody, int iahref, string strURL) { int iStart = strBody.IndexOf("<a href=\"", iahref); if (iStart < 0) iStart = strBody.IndexOf("<A HREF=\"", iahref); if (iStart < 0) return ""; int iEnd = strBody.IndexOf("</a>", iStart); if (iEnd < 0) iEnd = strBody.IndexOf("</A>", iStart); if (iEnd < 0) return ""; iStart += "<a href=\"".Length; int iSep = strBody.IndexOf(">", iStart); string strurl = strBody.Substring(iStart, (iSep - iStart) - 1); iSep++; string strTitle = strBody.Substring(iSep, iEnd - iSep); strTitle = MediaPortal.Util.Utils.stripHTMLtags(strTitle); HTMLUtil htmlUtil = new HTMLUtil(); htmlUtil.ConvertHTMLToAnsi(strTitle, out strTitle); strTitle = strTitle.Trim(); return strTitle.Trim(); } string ParseGenresIMDB(string strBody, int iGenre, string url) { string strTmp; string strTitle = ""; string strHRef = strBody.Substring(iGenre); int iSlash = strHRef.IndexOf(" / "); int iEnd = 0; int iStart = 0; if (iSlash >= 0) { int iRealEnd = strHRef.IndexOf(">more<"); if (iRealEnd < 0) iRealEnd = strHRef.IndexOf("</div>"); while (iSlash < iRealEnd) { iStart = iEnd + 2; iEnd = iSlash; int iLen = iEnd - iStart; if (iLen < 0) break; strTmp = strHRef.Substring(iStart, iLen); strTitle = strTitle + ParseAHREFIMDB(strTmp, 0, "") + " / "; iSlash = strHRef.IndexOf(" / ", iEnd + 2); if (iSlash < 0) iSlash = iRealEnd; } } // last genre iEnd += 2; strTmp = strHRef.Substring(iEnd); strTitle = strTitle + ParseAHREFIMDB(strTmp, 0, ""); HTMLUtil htmlUtil = new HTMLUtil(); htmlUtil.ConvertHTMLToAnsi(strTitle, out strTitle); return strTitle; } }